clear all

*********************************
***STUDY 1: NIGERIA 1
*********************************

cd "$directory"

use "01_RawData\raw data Nigeria 1.dta", clear

*Rename and generation of key variables
rename a03_female female
rename a02_age age

gen education=.
replace education=0 if a12_highest_educ==1
replace education=1 if a12_highest_educ>1 & a12_highest_educ<7 
replace education=2 if a12_highest_educ==8
replace education=3 if a12_highest_educ>8 & a12_highest_educ<10 
replace education=4 if a12_highest_educ==11
replace education=5 if a12_highest_educ>11 & a12_highest_educ<14 
replace education=6 if a12_highest_educ==14
replace education=7 if a12_highest_educ==17 
replace education=8 if a12_highest_educ>17 & a12_highest_educ!=. 

lab def edu 0 "No education" 1 "Primary incomplete" 2 "Primary complete" 3 "Junior High incomplete" 4 "Junior High complete" 5 "High school incomplete" 6 "High school complete" 7 "Higher Education incomplete" 8 "Higher Education Complete"

lab val education edu


*Dumy for those who has junior high school or above.
gen heduc=0 if education!=.
replace heduc=1 if education>=4

lab var age "Age, in years"
lab var female "=1 if subjects is female, 0 other"
lab var heduc "=1 if participant has junior high school or more"
lab var education "Highest education level"

*Specify the name of the study
gen data="Nigeria1"
lab var data "Study"

*outcome variables related to subjective and objective probabilities
gen pr5=el3
gen pr6=el4
gen pr1=el1
gen pr2=el2
gen pr3=el5
gen pr4=el6



lab var pr1 "Basket with 5 oranges (1 green, 4 yellow): likelihood of picking yellow"
lab var pr2 "Basket with 10 oranges (1 green, 9 yellow): likelihood of picking green"
lab var pr5 "Likelihood of attending mosque/church centre in the next 2 days"
lab var pr6 "Likelihood of attending mosque/church centre in the next 2 weeks"
lab var pr3 "Likelihood that respondent or household member will not eat next month"
lab var pr4 "Likelihood that respondent or household member will wash clothes next month"


*treatment variable
tab slider

*we generate enumerator code based on study + enumerator code in each study, to append all studies in one dataset  
egen code_enum=concat(data bg_enum2)
tab code_enum

lab var code_enum "Enumerator ID"

*Time variables

split start_d_bef_e, generate(s) limit(2)
split start_d_aft_e, generate(ss) limit(2)


split end_d_bef_e, generate(e) limit(2)
split end_d_aft_e, generate(ee) limit(2)


generate double start_d= clock(s2, "hms") if d_before==1
replace start_d= clock(ss2, "hms") if d_before==0
format start_d %tc

generate double end_d= clock(e2, "hms") if d_before==1
replace end_d= clock(ee2, "hms") if d_before==0
format end_d %tc

drop s1 s2 ss1 ss2 e1 e2 ee1 ee2 

br end_d_bef_e end_d_aft_e start_d_aft_e d_before start_d_bef_e start_d end_d

gen time_d=(end_d-start_d)/60000

sum time_d
lab var time_d "time (minutes)"
replace time_d=. if time_d>50
sum time_d


*id code
gen id_data=_n
egen id_unique=concat(data id)

lab var id_data "Unique id in the study"
lab var id_unique "Unique id across all studies"


*We keep with the relevant variables for this study with the aime of generate one dataset with all the studies
keep female age education heduc data pr1 pr2 pr3 pr4 pr5 pr6 time_d code_enum slider id_data id_unique

save "02_ProcessedData\Data_Nig1_clean.dta", replace



*********************************
***STUDY 2: NIGERIA 2
*********************************
clear all


use "01_RawData\raw data Nigeria 2.dta", clear

*Rename and generation of key variables
rename a03_female female
rename a02_age age

gen education=.
replace education=0 if a10_high_level_educ==0
replace education=1 if a10_high_level_educ>0 & a10_high_level_educ<6 
replace education=2 if a10_high_level_educ==7
replace education=3 if a10_high_level_educ>7 & a10_high_level_educ<10 
replace education=4 if a10_high_level_educ==10
replace education=5 if a10_high_level_educ>10 & a10_high_level_educ<13 
replace education=6 if a10_high_level_educ==13
replace education=6 if a10_high_level_educ==15
replace education=6 if a10_high_level_educ==16
replace education=7 if a10_high_level_educ==17 
replace education=8 if a10_high_level_educ>17 & a10_high_level_educ!=. 

lab def edu 0 "No education" 1 "Primary incomplete" 2 "Primary complete" 3 "Junior High incomplete" 4 "Junior High complete" 5 "High school incomplete" 6 "High school complete" 7 "Higher Education incomplete" 8 "Higher Education Complete"

lab val education edu


*Dumy for those who has junior high school or above.
gen heduc=0 if education!=.
replace heduc=1 if education>=4


lab var age "Age, in years"
lab var female "=1 if subjects is female, 0 other"
lab var heduc "=1 if participant has high school or more"
lab var education "Highest education level"


*Specify the name of the study
gen data="Nigeria2"
lab var data "Study"

*outcome variables related to subjective and objective probabilities
gen pr5=el3
gen pr6=el4
gen pr1=el1
gen pr2=el2
gen pr3=el5
gen pr4=el6

lab var pr1 "Basket with 5 oranges (1 green, 4 yellow): likelihood of picking yellow"
lab var pr2 "Basket with 10 oranges (1 green, 9 yellow): likelihood of picking green"
lab var pr5 "Likelihood of attending mosque/church centre in the next 2 days"
lab var pr6 "Likelihood of attending mosque/church centre in the next 2 weeks"
lab var pr3 "Likelihood that respondent or household member will not eat next month"
lab var pr4 "Likelihood that respondent or household member will wash clothes next month"


*treatment variable
tab slider

*we generate enumerator code based on study + enumerator code to append all studies in one dataset  
egen code_enum=concat(data bg_enum2)
tab code_enum

lab var code_enum "Enumerator ID"

*Time variables

split start_d_before_e, generate(s) limit(2)
split start_d_after_e, generate(ss) limit(2)


split end_d_before_e, generate(es) limit(2)
split end_d_after_e, generate(ee) limit(2)


generate double start_d= clock(s2, "hms") if d_before==1
replace start_d= clock(ss2, "hms") if d_before==0
format start_d %tc

generate double end_d= clock(es2, "hms") if d_before==1
replace end_d= clock(ee2, "hms") if d_before==0
format end_d %tc

drop s1 s2 ss1 ss2 es1 es2 ee1 ee2 


gen time_d=(end_d-start_d)/60000

sum time_d
lab var time_d "time (minutes)"
replace time_d=. if time_d>60
sum time_d


*id code
gen id_data=_n
egen id_unique=concat(data id_data)

lab var id_data "Unique id in the study"
lab var id_unique "Unique id across all studies"


keep female age education heduc data pr1 pr2 pr3 pr4 pr5 pr6 time_d code_enum slider id_data id_unique



save "02_ProcessedData\Data_Nig2_clean.dta", replace


*********************************
***STUDY 3: HONDURAS
*********************************
clear all

use "01_RawData\raw data Honduras.dta", clear

*labels in english

label var A02 "Enumerator code"
label var idsurvey "Survey code"
label var A04 "School"
label var A05 "Relationship of respondent to the child"

label var edad_enc "Age"
label var A07 "Gender (1 = male, 2 = female)"

label var timeEXP1 "Time when expectations section starts"
label var timeEXP2 "Time when expectations section ends"
label var timeexp1 "Time EXP1 (format h:min:s)"
label var timeexp2 "Time EXP2 (format h:min:s)"

label var el0 "Likelihood based on today's weather"
label var el1 "Basket with 5 apples (4 red, 1 green): likelihood of picking the green apple"
label var el2 "Basket with 10 apples (9 red, 1 green): likelihood of picking the green apple"
label var el3 "Likelihood of eating chicken in the next 2 days"
label var el4 "Likelihood of eating chicken in the next 2 weeks"
label var el5 "Likelihood that respondent or household member will not eat next month"
label var el6 "Likelihood that clothes will be washed in the next month"

label var edu_parents "Respondent education"
label var etnia "Respondent ethnicity"
label var otraetnia "Other ethnicity"

label var last7days "Household experienced food shortage in the last 7 days"

label var codesc "School code"
label var clus "Socioeconomic cluster of the school zone"

* ----------------------------
* Derived variables
* ----------------------------
label var female "Female"



*treatment variable

rename treatment_real slider
rename treatment itt_hon

lab var slider "=1 if slider was used, 0 if beans were used"
lab var itt_hon "treatment assigment, 1=slider 0=beans" /*one enumerator used slider instead of beans/beads n=29*/


*Rename and generation of key variables
rename edad_enc age

gen education=.
replace education=0 if edu_parents==1
replace education=1 if edu_parents>2 & edu_parents<6 
replace education=2 if edu_parents==7
replace education=3 if edu_parents>7 & edu_parents<10 
replace education=4 if edu_parents==10
replace education=5 if edu_parents>10 & edu_parents<13 
replace education=6 if edu_parents==13

replace education=7 if edu_parents==14
replace education=7 if edu_parents==16 
replace education=8 if edu_parents==15 | edu_parents==17 | edu_parents==18 | edu_parents==19 

lab def edu 0 "No education" 1 "Primary incomplete" 2 "Primary complete" 3 "Junior High incomplete" 4 "Junior High complete" 5 "High school incomplete" 6 "High school complete" 7 "Higher Education incomplete" 8 "Higher Education Complete"

lab val education edu

*Dumy for those who has junior high school or above.
gen heduc=0 if education!=.
replace heduc=1 if education>=4


lab var age "Age, in years"
lab var female "=1 if subjects is female, 0 other"
lab var heduc "=1 if participant has high school or more"
lab var education "Highest education level"

*Specify the name of the study

gen data="Honduras"
lab var data "Study"


*id code

rename idsurvey id_data
egen id_unique=concat(data id_data)

lab var id_data "Unique id in the study"
lab var id_unique "Unique id across all studies"

*outcome variables related to subjective and objective probabilities
gen pr5=el3
gen pr6=el4
gen pr1=el1
gen pr2=el2
gen pr3=el5
gen pr4=el6

lab var pr1 "Basket with 5 oranges (1 green, 4 yellow): likelihood of picking yellow"
lab var pr2 "Basket with 10 oranges (1 green, 9 yellow): likelihood of picking green"
lab var pr5 "Likelihood of attending mosque/church centre in the next 2 days"
lab var pr6 "Likelihood of attending mosque/church centre in the next 2 weeks"
lab var pr3 "Likelihood that respondent or household member will not eat next month"
lab var pr4 "Likelihood that respondent or household member will wash clothes next month"

*we generate enumerator code based on study + enumerator code to append all studies in one dataset  
egen code_enum=concat(data A02)
tab code_enum

lab var code_enum "Enumerator ID"


*Time variables

generate double start_d= clock(timeexp1, "hms")


generate double end_d= clock(timeexp2, "hms")

gen time_d=(end_d-start_d)/60000

sum time_d
lab var time_d "time (minutes)"
replace time_d=. if time_d<0
replace time_d=. if time_d>50

sum time_d


keep female age education heduc data pr1 pr2 pr3 pr4 pr5 pr6 time_d code_enum slider id_data id_unique

save "02_ProcessedData\Data_Hon_clean.dta", replace


*********************************
***STUDY 4: ANDALUCÍA
*********************************
clear all

use "01_RawData\raw data Andalucia.dta", clear


*Rename and generation of key variables

rename s1 age
rename s2 female
rename s9 education_andalucia /*we define another education variable since it is not comparable with the questions of the others studies*/

*Dumy for those who has junior high school or above. In this case, we use the incomplete ESO in order to make it consistent with other studies.
gen heduc=0 if education_andalucia!=.
replace heduc=1 if education_andalucia>=2 & education_andalucia!=.

lab var age "Age, in years"
lab var female "=1 if subjects is female, 0 other"
lab var heduc "=1 if participant has high school or more"
lab var education "Highest education level"

*Treatment variable
tab slider
lab var slider "=1 if slider was used, 0 if beans were used"

*Specify the name of the study
gen data="Andalusia"
lab var data "Study"

*outcome variables related to subjective and objective probabilities

gen pr5=el3
gen pr6=el4
gen pr1=el1
gen pr2=el2
gen pr3=el5
gen pr4=el6

lab var pr1 "Basket with 5 oranges (1 green, 4 yellow): likelihood of picking yellow"
lab var pr2 "Basket with 10 oranges (1 green, 9 yellow): likelihood of picking green"
lab var pr5 "Likelihood of attending mosque/church centre in the next 2 days"
lab var pr6 "Likelihood of attending mosque/church centre in the next 2 weeks"
lab var pr3 "Likelihood that respondent or household member will not eat next month"
lab var pr4 "Likelihood that respondent or household member will wash clothes next month"



*we generate enumerator code based on study + enumerator code to append all studies in one dataset  
egen code_enum=concat(data elquelee)
tab code_enum

lab var code_enum "Enumerator ID"



*id code
gen id_data=_n
egen id_unique=concat(data id_data)

lab var id_data "Unique id in the study"
lab var id_unique "Unique id across all studies"


*time
gen start=hms( horaini, minutosini, segundosini )
gen end=hms( hora, minutos, segundos)
gen time_d=(end-start)/60000

sum time_d
lab var time_d "time (minutes)"
replace time_d=. if time_d>40
replace time_d=. if time_d<0

sum time_d
keep female age education heduc data pr1 pr2 pr3 pr4 pr5 pr6 time_d code_enum slider id_data id_unique

save "02_ProcessedData\Data_And_clean.dta", replace


****Append

cd "$directory"


append using "02_ProcessedData\Data_Nig1_clean.dta"
append using "02_ProcessedData\Data_Nig2_clean.dta"
append using "02_ProcessedData\Data_Hon_clean.dta"

forvalues k=1(1)6{
lab var pr`k' "q`k'"
}
save "02_ProcessedData\Data_allstudies.dta", replace



